#!/usr/bin/env python
# -*- coding:utf-8 -*-

"""
@author zyx
@since 2022/1/29 17:32
@file: c05_bs案例_三国演义.py
"""

from bs4 import BeautifulSoup
import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
                  'AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/97.0.4692.71 Safari/537.36'
}
# 首页地址
main_url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
# 发起请求，获取了主页页面源码
response = requests.get(url=main_url, headers=headers)
response.encoding = 'utf-8'
page_text = response.text
print(page_text)
# 数据解析：章节标题+详情页链接
soup = BeautifulSoup(page_text, 'lxml')
a_list = soup.select('.book-mulu > ul > li > a')
fp = open('./sanguo.txt', 'w', encoding='utf-8')
for a in a_list:
    title = a.string  # 章节标题
    detail_url = 'https://www.shicimingju.com' + a['href']  # 详情页地址
    # 请求详情页的页面源码数据
    response = requests.get(url=detail_url, headers=headers)
    response.encoding = 'utf-8'
    detail_page_text = response.text
    # 解析：解析章节内容
    d_soup = BeautifulSoup(detail_page_text, 'lxml')
    div_tag = d_soup.find('div', class_='chapter_content')
    content = div_tag.text  # 章节内容
    fp.write(title + ':' + content + '\n')
    print(title, '爬取保存成功！')
fp.close()
